# 爬优酷电影
import requests
import re


class YouKU(object):
    def __init__(self):
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36'}

    def parse_html(self, url):  # 获取源码
        response = requests.get(url=url, headers=self.headers).content.decode()
        return response

    def save_html(self, html):  # 保存源码
        with open("优酷.json", "a") as f:
            f.write(html)

    def download(self, html):  # 提取数据保存为ts
        str_list = re.findall("https://.*", html)
        for num, str in enumerate(str_list):
            print("正在下载{}/{}...".format(num, len(str)))
            with open("电影.ts", 'ab')as f:
                f.write(requests.get(str).content)

    def run(self):
        url = 'https://valipl.cp31.ott.cibntv.net/69752C607094D71FE7FAD63AE/03000500005DF8F74C8BB7800000009114372E-BA50-426C-9CB1-F75A492C6B9D-1-114.m3u8?ccode=0502&duration=6006&expire=18000&psid=584f188f7048c2b0dce905caba18f239&ups_client_netip=&ups_ts=1584760061&ups_userid=&utid=zC3yFsZLQTUCAatzedJtL2Th&vid=XMzI0NzMxMjcxMg&vkey=Ba4322f7f4a2f31bad5acac4033736fda&sm=1&operate_type=1&dre=u37&si=73&eo=0&dst=1&iv=0&s=efbfbd520cefbfbd6774&bc=2'
        html = self.parse_html(url)
        self.download(html)
        self.save_html(html)


if __name__ == '__main__':
    spider = YouKU()
    spider.run()
